library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytext)
library(RColorBrewer)
library(ggthemes)
library(here)
## here() starts at /Users/Nosheal/Documents/CMU/State_Manufacturing
library(ggrepel)
library(lubridate)
library(ggnewscale)
library(readxl)
library(patchwork)
library(ggridges)
#library(htmltools)
#library(htmlwidgets)
#library(RJSONIO)
title_theme <- theme(axis.text = element_text(size = 14),
axis.title = element_text(size = 18),
title = element_text(size = 20))
axis_theme <- theme(axis.text = element_text(size = 10),
axis.title = element_text(size = 18))
We were able to find a new, far more detailed database of state use of CRF. These data are taken from a government website, Pandemic Oversight, which provides a detailed breakdown of specific awards related to the Coronavirus Relief Fund.
We start by importing and cleaning data frames, beginning with a state overview. We also load in our previously constructed and cleaned data about manufacturing economic activity.
project_details <- read.csv(here("CRF/Project Details.csv"))
prime_recipients <- read.csv(here("CRF/prime_recipients.csv"))
project_finaces <- read.csv(here("CRF/project_finances.csv"))
state_summary <- read_xlsx(here("CRF/state_summary.xlsx"))
manf_data <- readRDS(here("State Data/manf_clean.RDS"))
We start by looking at how much the federal government reports state recieved.
crf_payments <- read_xlsx(here("CRF/crf_payments.xlsx"))
crf_feds <- crf_payments %>%
filter(!is.na(Amount)) %>%
filter(str_detect(State_Local, "Payment to |Total allocation")) %>%
pivot_wider(names_from = State_Local, values_from = Amount) %>%
mutate(`Total allocation` = case_when(
is.na(`Total allocation`) ~ `Total allocation and payment to the state`,
TRUE ~ `Total allocation`),
`Payment to the state` = case_when(
is.na(`Payment to the state`) ~ `Total allocation and payment to the state`,
TRUE ~ `Payment to the state`)) %>%
select(state_name = State, state_payment = `Payment to the state`, state_total_allocation = `Total allocation`) %>%
left_join(states)
## Joining with `by = join_by(state_name)`
The first column gives us how much money went directly to states, and the second the total amount that states received.
We can then compare this against what states report they spend, across specific spending categories.
For our state overview, we separate out county payments from state payments, noting that we don’t do this completely correctly for Alaska, and create a data frame that contains 1) the total amount the state received from the federal government, and 2) how the state spent the funds across a few categories of spending.
We start with the total amount of money that states report allocating, separating out the payment that went to the states from the payment that went to counties within the state.
state_sum <- crf_feds %>%
left_join(states) %>%
ggplot() +
geom_col(aes(x = reorder(state_abbr, state_payment), y = state_total_allocation), fill = brewer.pal(7, "Set3")[4], color = "black", alpha = 0.8) +
geom_col(aes(x = reorder(state_abbr, state_payment), y = state_payment), fill = "lightgrey", color = "black", alpha = 0.8) +
scale_y_continuous(labels = scales::unit_format(unit = "$B", scale = 1e-9)) +
coord_flip() +
labs(y = "Total $ Received from Cornavirus Relief Fund (CRF)", x = "") +
theme_bw() +
axis_theme
## Joining with `by = join_by(state_name, state_abbr)`
state_sum
From here, we turn to the aggregate database of how states report spending the funds allocated to them.
We start by cleaning this dataset and examining the distribution of how staets and counties report spending their money.
state_summary_long <- state_summary %>%
pivot_longer(cols = -c(1:5), names_to = "spending_category", values_to = "spending_amount") %>%
mutate(city_county = case_when(
str_detect(`Prime Recipient`, "CITY OF|TOWN|COUNTY") ~ "county_spending",
str_detect(`Prime Recipient`, "CITY OF|TOWN|COUNTY", negate = TRUE ) ~ "state_spending"),
city_county = case_when(
State == "AK" & str_detect(`Prime Recipient`, "DEPT", negate = TRUE) ~ "not_state",
TRUE ~ city_county )) %>%
rename(payment_amount = `Payment Amount`)
Curiously, we see that some of the values that states report are less than 0. It is unclear exactly what is going on with these payments, so we will leave them alone for now, but create a dataframe to store their values in case we need to check them again.
state_crf_neg <- state_summary_long %>%
filter(spending_amount < 0)
We now take our data and sum over each of the spending categories, separating out how counties and cities report spending their allocations, from how states report spending their allocations.
state_crf_clean <- state_summary_long %>%
group_by(State, spending_category, city_county, payment_amount) %>%
reframe(total_spending = sum(spending_amount), number_of_recipients = n()) %>%
group_by(State, spending_category, city_county) %>%
reframe(total_payment = max(payment_amount), total_spending = sum(total_spending)) %>%
pivot_wider(names_from = city_county, values_from = total_spending) %>%
rename(state_abbr = State) %>%
filter(!is.na(state_abbr))
With this data, we can now get a count of the total amount of money that each state and county received, as well as the categories that they spent their money on. First, we compare the total amount that states report spending with the total amount that states were allocated.
state_crf_clean %>%
select(state_abbr, total_payment) %>%
distinct() %>%
group_by(state_abbr) %>%
mutate(state_pay = max(total_payment),
state_county = case_when(
state_pay > total_payment ~ "City and Counties",
TRUE ~ "State"
)) %>%
filter(!is.na(state_abbr)) %>%
left_join(states) %>%
left_join(crf_feds) %>%
filter(!is.na(state_abbr)) %>%
mutate(diff = state_payment - state_pay) %>%
view()
## Joining with `by = join_by(state_abbr)`
## Joining with `by = join_by(state_abbr, state_name)`
We see that in general, the amount that states report spending in total match with the total payment to the state; however, the amount that counties report spending do not add to the total state allocation.
Using state provided data about how states spend their money, we can examine a breakdown of how funds are allocated across distinct spending categories.
unknown_spend <- function(data){
data %>%
mutate(spending_percent = state_spending/state_payment) %>%
group_by(state_abbr) %>%
mutate(state_pay = max(total_payment),
state_county = case_when(
state_pay >= total_payment ~ "City and Counties",
TRUE ~ "State"
)) %>%
filter(!is.na(state_abbr), state_county == "State") %>%
mutate(state_spend = sum(state_spending, na.rm = TRUE)) %>%
mutate(unknown = state_payment - state_spend,
spending_percent = unknown / total_payment) %>%
select(state_abbr, state_spending = unknown, state_payment, spending_percent, state_pay) %>%
distinct() %>%
mutate(spending_category = "Unknown")
}
Because the federal dataset does not have information about US territories we create another dataframe to hold these values.
crf_territories <- state_crf_clean %>%
left_join(states) %>%
anti_join(crf_feds) %>%
mutate(state_payment = total_payment)
## Joining with `by = join_by(state_abbr)`
## Joining with `by = join_by(state_abbr, state_name)`
crf_territory_sum <- crf_territories %>%
select(state_abbr, state_payment) %>%
mutate(state_total_allocation = state_payment) %>%
distinct()
crf_feds_all <- crf_feds %>%
bind_rows(crf_territory_sum)
state_crf_spending <- state_crf_clean %>%
left_join(states) %>%
left_join(crf_feds, .) %>%
bind_rows(crf_territories) %>%
mutate(spending_percent = state_spending/state_payment,
state_county = case_when(
state_payment < total_payment ~ "City and Counties",
TRUE ~ "State"
)) %>%
group_by(state_abbr) %>%
filter(!is.na(state_abbr), state_county == "State", !is.na(state_county)) %>%
mutate(state_spend = sum(state_spending, na.rm = TRUE))
## Joining with `by = join_by(state_abbr)`
## Joining with `by = join_by(state_name, state_abbr)`
cat_scale <- c(brewer.pal(12, "Paired"), brewer.pal(3, "Spectral")[1:2], brewer.pal(3, "Set1")[1], brewer.pal(3, "Dark2"), "lightgrey", brewer.pal(3, "Set2")[2])
state_crf_cats <- state_crf_spending %>%
ggplot() +
geom_col(aes(x = reorder(state_abbr, state_payment), y = spending_percent, fill = spending_category), color = "black", alpha = 0.8) +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values = cat_scale)+
coord_flip() +
labs(y = "Total $ Received from Cornavirus Relief Fund (CRF)", x = "", fill = "Spending Category") +
theme_bw() +
axis_theme
graph_2_nolab <- state_crf_cats +
guides(fill = "none")
state_crf_cats
## Warning: Removed 944 rows containing missing values (`position_stack()`).
We see that not all of the allocated state funds are accounted for in a specific spending categories, even including “All items listed above”.
In addition, while some PPE related expenditures are outside of the PPE spending category, we can focus on this category to get a sense of how much of their total allocation states spent on PPE.
state_crf_spending %>%
filter(spending_category == "Personal protective equipment") %>%
ggplot() +
geom_col(aes(x = reorder(state_abbr, state_payment), y = spending_percent), color = "black", alpha = 0.8, fill = brewer.pal(9, "Set1")[1]) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
theme_bw() +
labs(y = "Percent Spent on Personal Protective Equipment", x = "") +
axis_theme
## Warning: Removed 39 rows containing missing values (`position_stack()`).
We see that in general, states vary substantially in the percent of their allocated funds that the spend on personal protective equipment expenditures; however, this aggregate picture is still not perfectly clear.
We can then compare this aggregate summary of spending against a summation over a more granular approach using individual contract data.
We start with looking at the prime recipients of contracts to states.
prime_states <- prime_recipients %>%
mutate(state_abbr = str_sub(Prime.recipient, -6)) %>%
mutate(Prime.recipient = str_trim(str_remove_all(str_remove(Prime.recipient, state_abbr), "[:punct:]"))) %>%
mutate(state_abbr = str_trim(str_remove_all(state_abbr, "[:punct:]"))) %>%
left_join(states) %>%
rename(sub_award_amount_all = Sub.award.amount)
## Joining with `by = join_by(state_abbr)`
prime_states %>%
group_by(state_abbr) %>%
reframe(dollars_spent = sum(sub_award_amount_all)) %>%
left_join(crf_feds_all) %>%
mutate(spend_perc_state = dollars_spent / state_payment, spec_perc_all = dollars_spent / state_total_allocation) %>%
view()
## Joining with `by = join_by(state_abbr)`
In general, we see that the total amount of funds awarded to sub-contracts roughly matches the total amount awarded to states, and where it exceeds the total amount allocated to states it appears to come from money to tribal areas.
We also see some states award contracts to out-of-state sub-recipients.
We then move to the next level pass, seeking to understand how prime recipients pass money along to sub-contractees, focusing on the dataset of sub-awarded contracts to see how much money we can trace down to particular recipients and how this compares to the total amount that states were allocated.
project_finances_clean <- project_finaces %>%
select(Prime.recipient, Sub.recipient, Award.number, Award.type, Location.info, County, Sub.award.amount, Money.spent.to.date) %>%
filter(County != "") %>%
mutate(state_abbr_sub = substr(County, 1, 2)) %>%
group_by(Prime.recipient, Sub.recipient, Award.number, County, state_abbr_sub) %>%
reframe(Sub.award.amount = sum(Sub.award.amount, na.rm = TRUE), dollars_spent = sum(Money.spent.to.date, na.rm = TRUE))
Because of the way our data is structured, we need to be careful about how we join our two datasets together. We start by matching based on the Prime recipient, the sub recipient, and the dollar amount awarded.
primes_subs <- project_finances_clean %>%
mutate(Prime.recipient = str_trim(str_remove_all(Prime.recipient, "[:punct:]"))) %>%
group_by(Prime.recipient, Sub.recipient) %>%
mutate(sub_award_amount_all = sum(Sub.award.amount)) %>%
left_join(prime_states) %>%
rename(state_abbr_prime = state_abbr) %>%
mutate(state_abbr_prime = case_when(
str_detect(Prime.recipient, "HOMELAND SECURITY EMERGENCY PREPAREDNESS LA GOVERNOR") ~ "LA",
TRUE ~ state_abbr_prime
),
state_name = case_when(
str_detect(Prime.recipient, "HOMELAND SECURITY EMERGENCY PREPAREDNESS LA GOVERNOR") ~ "Louisiana",
TRUE ~ state_abbr_prime
))
## Joining with `by = join_by(Prime.recipient, Sub.recipient,
## sub_award_amount_all)`
primes_subs %>%
filter(str_detect(Sub.recipient, "INSIGHT PUBLIC"), str_detect(Prime.recipient, "WISCONSIN")) %>%
filter(is.na(state_abbr_prime)) %>%
select(-c(state_abbr_prime, state_name)) %>%
left_join(prime_states %>%
rename(sub_test = sub_award_amount_all)) %>%
mutate(test = sub_award_amount_all - sub_test) %>%
view()
## Joining with `by = join_by(Prime.recipient, Sub.recipient)`
However, there are some instances that did not match perfectly. We now go in and solve this issue. We go into our dataset and filter out all the entries that did not match.
primes_subs_holder <- primes_subs %>%
filter(!is.na(state_abbr_prime))
We then focus on the unmatched entries in our data. We see that there appears to be an error in matching the exact sub-award amounts across the database, which we can mostly fix with rounding.
state_prime_supp <- primes_subs %>%
filter(is.na(state_abbr_prime)) %>%
select(-c(state_abbr_prime, state_name)) %>%
rename(sub_award_amount_exact = sub_award_amount_all) %>%
mutate(sub_award_amount_est = round(sub_award_amount_exact, 0)) %>%
left_join(prime_states %>%
mutate(sub_award_amount_est = round(sub_award_amount_all, 0))) %>%
rename(state_abbr_prime = state_abbr)
## Joining with `by = join_by(Prime.recipient, Sub.recipient,
## sub_award_amount_est)`
However, we still have some entities that are unmatched.
state_prime_supp %>%
filter(is.na(state_abbr_prime)) %>%
select(-c(state_abbr_prime, sub_award_amount_all, state_name)) %>%
select(Prime.recipient, Sub.recipient) %>%
distinct() %>%
view()
We can see that we can fix this with brute force somewhat; however, there are still some unmatched entries, due to the fact that there are a few counties that have the same name across states and contracted out to the same sub-awardees.
state_fix_0 <- state_prime_supp %>%
filter(!is.na(state_abbr_prime))
state_fix_1 <- state_prime_supp %>%
filter(is.na(state_abbr_prime)) %>%
select(-c(state_abbr_prime, sub_award_amount_all, state_name)) %>%
mutate(state_abbr_prime = case_when(
str_detect(Prime.recipient, "OREGON") ~ "OR",
str_detect(Prime.recipient, "DANE") ~ "WI",
str_detect(Prime.recipient, "DU PAGE") ~ "IL",
str_detect(Prime.recipient, "NEW MEXICO") ~ "NM",
str_detect(Prime.recipient, "INDIANA") ~ "IN",
str_detect(Prime.recipient, "MAINE") ~ "ME",
str_detect(Prime.recipient, "INDIANA") ~ "IN",
str_detect(Prime.recipient, "PR TREASURY DEPARTMENT") ~ "PR",
str_detect(Prime.recipient, "TREASURER CALIFORNIA STATE") ~ "CA"
))
state_fix_2 <- state_fix_1 %>%
filter(is.na(state_abbr_prime)) %>%
select(c(1:5)) %>%
left_join(prime_states) %>%
rename(state_abbr_prime = state_abbr) %>%
group_by(Prime.recipient, Sub.recipient, state_abbr_prime) %>%
mutate(tal = seq(n())) %>%
filter(tal == 1) %>%
select(-c(tal))
## Joining with `by = join_by(Prime.recipient, Sub.recipient)`
## Warning in left_join(., prime_states): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 11610 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
We can now merge our datasets back together to fix them.
primes_subs_clean <- primes_subs_holder %>%
bind_rows(state_fix_0) %>%
bind_rows(state_fix_1 %>%
filter(!is.na(state_abbr_prime))) %>%
bind_rows(state_fix_2) %>%
mutate(in_out = case_when(
state_abbr_prime == state_abbr_sub ~ "in_state",
state_abbr_prime != state_abbr_sub ~ "out_state",
)) %>%
select(c(1:6, 9:10, 13, sub_award_amount_all))
We can now compare how much money states spent on within-state contracts compared to out of state contracts, as well as the total amount that they spent relative to what they were allocated.
mark_unique <- function(data, var) {
data %>%
group_by({{ var }}) %>%
mutate(tal = seq(n()),
first_count = case_when(tal == 1 ~ 1)) %>%
select(-c(tal)) %>%
ungroup()
}
primes_subs_clean <- primes_subs_clean %>%
mark_unique(Sub.recipient)
primes_subs_clean %>%
filter(first_count == 1) %>%
nrow()
## [1] 76832
We were able to identify 76,832 of the reported 90K unique Sub.recipients. However, we know that we will not be able to match all of these sub recipients to actual awards.
We compare the total amount of money we can trace from prime recipients to sub recipients to the total money that the states report.
state_awardees <- primes_subs_clean %>%
group_by(state_abbr_prime) %>%
reframe(dollars_awarded = sum(Sub.award.amount, na.rm = TRUE), sub_awards = sum(first_count, na.rm = TRUE)) %>%
rename(state_abbr = state_abbr_prime) %>%
left_join(crf_feds_all) %>%
mutate(perc_state = dollars_awarded / state_payment, perc_all = dollars_awarded / state_total_allocation)
## Joining with `by = join_by(state_abbr)`
state_awards_all <- state_awardees %>%
ggplot() +
geom_bar(aes(perc_all), fill = brewer.pal(5, "Set3")[5], alpha = 0.8, stat = "bin", color = "black") +
labs(x = "Percent of Total State Allocation Traceable to Prime and Sub-Recipients", y = "Number of States") +
scale_x_continuous(labels = scales::percent) +
theme_bw() +
axis_theme
state_awards_all
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
We see that more than 20 states report 100% of their allocated state funding down to prime and sub awardees. However, there are a number of states that fall short.
state_awardees %>%
arrange(perc_all) %>%
view()
state_awardees %>%
ggplot() +
geom_col(aes(x = state_total_allocation, y = reorder(state_abbr, state_total_allocation)), color = "black", fill = brewer.pal(8, "Dark2")[6]) +
geom_col(aes(x = dollars_awarded, y = reorder(state_abbr, state_total_allocation)), color = "black", fill = brewer.pal(8, "Set1")[2]) +
scale_x_continuous(labels = scales::unit_format(unit = "$B", scale = 1e-9)) +
labs(x = "Total Dollars to State, and Prime/Sub Recipients", y = "") +
theme_dark() +
axis_theme
For robustness, we also compare how dollars awarded matches against state direct payments. We see that the database of prime and sub awardees clearly includes contracts made by cities and counties.
primes_subs_clean %>%
group_by(state_abbr_prime) %>%
reframe(dollars_awarded = sum(Sub.award.amount, na.rm = TRUE), sub_awards = sum(first_count, na.rm = TRUE)) %>%
rename(state_abbr = state_abbr_prime) %>%
left_join(crf_feds_all) %>%
mutate(perc_state = dollars_awarded / state_payment, perc_all = dollars_awarded / state_total_allocation) %>%
ggplot() +
geom_bar(aes(perc_state), fill = "lightgrey", alpha = 0.8, stat = "bin", color = "black") +
labs(x = "Percent of State Payments Traceable to Prime and Sub-Recipients", y = "Number of States") +
theme_bw() +
axis_theme
## Joining with `by = join_by(state_abbr)`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
We can also examine what percent of state funds went to in-state compared to out of state awardees.
state_dom_not <- primes_subs_clean %>%
group_by(state_abbr_prime, in_out) %>%
mutate(sub_award_amount_all = case_when(
is.na(sub_award_amount_all) ~ Sub.award.amount,
TRUE ~ sub_award_amount_all
)) %>%
reframe(state_spent = sum(sub_award_amount_all)) %>%
filter(!is.na(state_abbr_prime)) %>%
group_by(state_abbr_prime) %>%
mutate(tot_spend = sum(state_spent),
spend_perc = state_spent / tot_spend) %>%
ggplot() +
geom_col(aes(y = reorder(state_abbr_prime, tot_spend), x = spend_perc, fill = in_out)) +
scale_x_continuous(labels = scales::percent) +
labs(x = "Percent of Funds Spent In or Out of State", y = "") +
guides(fill = "none") +
theme_bw() +
axis_theme
state_dom_not
We see that states vary in the amount of funds they spend in state, compared to out of state.
We now move from here to matching individual awards and their descriptions. Unfortunately, because of inconsistencies with award numbers, we can’t consistently match individual projects with the states that awarded those projects. We will see how far we can get and then focus on individual PPE contract data.
We start with some basic cleaning.
project_clean_1 <- project_details %>%
filter(Spending.category != "Select")
One of the main problems in this data are awards without a specific number, as well as those awarded to multiple participants. We cannot match anything about awards that went to multiple recipients because of a lack of details.
project_clean_2 <- project_clean_1 %>%
filter(Sub.recipient != "MULTIPLE RECIPIENTS")
We focus on the majority of our data, for which we have award numbers. The process of matching individual contracts to the state that awarded them is tedious and only partially complete. However, we hypothesize that the completeness of any given state’s data is correlated with their state capacity and not completely random.
project_clean_3 <- project_clean_2 %>%
filter(Award.number != "") %>%
rename(sub_award_amnt = Sub.award.amount)
We first see how many entries we can match just by award number and sub-recipient alone.
For some of the entries in our database of prime to sub contracts, we have repeats of the same sub recipient and award number. These entries can be matched by the total dollar amount of the contract awarded, which we begin with.
project_match_dupes <- primes_subs_clean %>%
filter(Award.number != "") %>%
group_by(Sub.recipient, Award.number) %>%
mutate(tal = seq(n()),
max_tal = max(tal)) %>%
group_by(Sub.recipient, Award.number) %>%
filter(max_tal > 1, Sub.award.amount > 0 ) %>%
left_join(
project_clean_3 %>%
group_by(Sub.recipient, Award.number, Project.description, Spending.category) %>%
reframe(Sub.award.amount = sum(sub_award_amnt))
)
## Joining with `by = join_by(Sub.recipient, Award.number, Sub.award.amount)`
We then take these entries out of our database of projects.
project_clean_4 <- project_clean_3 %>%
anti_join(project_match_dupes %>%
select(Sub.recipient, Award.number))
## Joining with `by = join_by(Sub.recipient, Award.number)`
Now, focusing on the remaining entries, we match by sub-recipient and award number.
primes_subs_no_dupes <- primes_subs_clean %>%
filter(Award.number != "") %>%
group_by(Sub.recipient, Award.number) %>%
mutate(tal = seq(n()),
max_tal = max(tal)) %>%
group_by(Sub.recipient, Award.number) %>%
filter(max_tal == 1)
project_match_1 <- project_clean_4 %>%
left_join(primes_subs_no_dupes)
## Joining with `by = join_by(Sub.recipient, Award.number)`
no_match1 <- project_match_1 %>%
filter(is.na(state_abbr_prime)) %>%
select(c(1:9))
We also see that only 200 entries were not matched with our initial database of prime and sub awardees. We can examine how many of these contracts might be related to PPE expenditures.
ppe_words <- c("ppe", "protective", "mask", "respirator", "nN95")
no_match_ppe <- no_match1 %>%
mutate(proj_lower = tolower(Project.description),
ppe_match = case_when(
str_detect(proj_lower, paste(ppe_words, collapse = "|")) | Spending.category == "Personal Protective Equipment" ~ 1
)) %>%
filter(ppe_match == 1)
From these, we filter out contracts that do not have a sub-recipient match in our database of awardees.
no_match_list <- no_match_ppe %>%
anti_join(primes_subs_clean %>%
select(Sub.recipient))
## Joining with `by = join_by(Sub.recipient)`
no_match_fix1 <- no_match_ppe %>%
filter(!Sub.recipient %in% no_match_list$Sub.recipient) %>%
left_join(primes_subs_clean %>% ungroup() %>% select(c(Sub.recipient, state_abbr_sub, state_abbr_prime, in_out)) %>% distinct() %>% filter(str_detect(Sub.recipient, "FASTENAL", negate = TRUE))) %>%
mutate(state_abbr_sub =
case_when(str_detect(Sub.recipient, "FASTENAL") ~ "MO",
TRUE ~ state_abbr_sub),
state_abbr_prime = case_when(str_detect(Sub.recipient, "FASTENAL") ~ "MO",
TRUE ~ state_abbr_prime),
in_out = case_when(str_detect(Sub.recipient, "FASTENAL") ~ "in_state",
TRUE ~ in_out))
## Joining with `by = join_by(Sub.recipient)`
We can now join our databases together for our first complete database of matched contracts, acknowledging that we missed some contracts because of a lack of matches.
project_match_2 <- project_match_1 %>%
filter(!is.na(state_abbr_prime)) %>%
bind_rows(no_match_fix1)
We now turn to awards without an award number to see how many of these we can match.
In adddition, we have a substantial amount of awards with no award number.
no_award_num <- project_clean_2 %>%
filter(Award.number == "")
For these awards, we see that there are no project descriptions; however, these projects are separated across multiple spending categories, including a substantial amount of awards to our area of main interest: Personal Protective Equipment.
no_award_num %>%
group_by(Spending.category) %>%
count()
## # A tibble: 20 × 2
## # Groups: Spending.category [20]
## Spending.category n
## <chr> <int>
## 1 "" 74
## 2 "Administrative Expenses" 1009
## 3 "Budgeted Personnel and Services Diverted to a Substantially Different… 1106
## 4 "COVID-19 Testing and Contact Tracing" 1020
## 5 "Economic Support (Other than Small Business, Housing, and Food Assist… 5338
## 6 "Expenses Associated with the Issuance of Tax Anticipation Notes" 21
## 7 "Facilitating Distance Learning" 2517
## 8 "Food Programs" 1049
## 9 "Housing Support" 584
## 10 "Improve Telework Capabilities of Public Employees" 1438
## 11 "Items Not Listed Above" 3994
## 12 "Medical Expenses" 1117
## 13 "Nursing Home Assistance" 3400
## 14 "Other COVID-19-Related Expenses" 1
## 15 "Payroll for Public Health and Safety Employees" 994
## 16 "Personal Protective Equipment" 2503
## 17 "Public Health Expenses" 3636
## 18 "Small Business Assistance" 3582
## 19 "Unemployment Benefits" 98
## 20 "Workers Compensation" 105
As such, we try and match by sub-recipient as well as the total amount of money that that recipient received. To do this, we have to round our sub award amounts to a common degree.
We see that in certain cases we will have to use the sub award amount to match, and in certain cases the money spent to date amount to match.
We start by focusing on values that have a positive sub award amount.
no_award_pos <- no_award_num %>%
filter(Sub.award.amount > 0) %>%
mutate(sub_award_est = round(Sub.award.amount, 3),
sub_award_ext = Sub.award.amount) %>%
select(-c(Sub.award.amount))
We now see how many of these we can match.
no_award_match_1 <- no_award_pos %>%
ungroup() %>%
left_join(primes_subs_clean %>%
ungroup() %>%
filter(Award.number == "", sub_award_amount_all > 0) %>%
select(-c(Award.number)) %>%
mutate(sub_award_est = round(Sub.award.amount, 3)))
## Joining with `by = join_by(Sub.recipient, sub_award_est)`
## Warning in left_join(., primes_subs_clean %>% ungroup() %>% filter(Award.number == : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 15193 of `x` matches multiple rows in `y`.
## ℹ Row 7940 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
no_award_clean_1 <- no_award_match_1 %>%
filter(!is.na(state_abbr_prime)) %>%
group_by(Sub.recipient, sub_award_est) %>%
mutate(tal = seq(n())) %>%
filter(tal == 1)
We see that we’ve matched 20,973 out of 23,232 observations. For these other observations, we can see how far we get using the Money Spent to Date variable.
no_award_match_2 <- no_award_match_1 %>%
filter(is.na(state_abbr_prime)) %>%
select(c(1:11)) %>%
mutate(sub_award_est = round(Money.spent.to.date, 3)) %>%
left_join(primes_subs_clean %>%
ungroup() %>%
filter(Award.number == "", sub_award_amount_all > 0) %>%
select(-c(Award.number)) %>%
mutate(sub_award_est = round(Sub.award.amount, 3))) %>%
rename(money_spent_est = sub_award_est)
## Joining with `by = join_by(Sub.recipient, sub_award_est)`
With this, we can get another 400 or so matches, leaving us still a few thousand short.
no_award_clean_2 <- no_award_clean_1 %>%
bind_rows(no_award_match_2 %>%
filter(!is.na(state_abbr_prime)))
We can now take the remainder of the contracts that we have and match to entries that have unique states.
unique_subs <- primes_subs_clean %>%
select(Prime.recipient, Sub.recipient, state_abbr_sub, state_abbr_prime) %>%
distinct() %>%
group_by(Sub.recipient) %>%
mutate(tal = seq(n())) %>%
mutate(max_tal = max(tal, na.rm = TRUE))
no_award_match_3 <- no_award_pos %>%
filter(!Sub.recipient %in% unique(no_award_clean_2$Sub.recipient)) %>%
select(-c(sub_award_est)) %>%
left_join(unique_subs %>%
filter(max_tal == 1))
## Joining with `by = join_by(Sub.recipient)`
This gives us another 1,500 matched entries, leaving us just over 200 unmatched entries. For now, we will discard these entries.
no_award_match_3 %>%
filter(is.na(state_abbr_prime)) %>%
view()
We now have to clean up our variables for the dollars spent on each contract.
no_award_clean_3 <- no_award_clean_2 %>%
bind_rows(no_award_match_3 %>%
filter(!is.na(state_abbr_prime)))
We note that our “best” variable to measure spending in this data is the “sub_award_ext” variable. We simplify the dataset to keep this variable as well as a few other for potential usefulness later.
no_award_final <- no_award_clean_3 %>%
ungroup() %>%
select(Sub.recipient, Prime.recipient, Project.description, state_abbr_sub, state_abbr_prime, Spending.category, dollars_spent = Money.spent.to.date, Sub.award.amount, sub_award_amount_all, dollars_awarded = sub_award_ext)
Before we join this data back with our cleaned and matched project number data. Our “best” variables are the Sub.award.amount variable from the prime-sub recipient database, and the Money Spent to Date variable from the individual sub-recipients to award numbers database.
We simplify to be one row per sub recipient and award number. For our project data with award numbers, we see that each award number is further broken down into sub-spending categories. The total amount of the award is tracked in the “Sub-Award Amount” column, while the money spent to date on a specific category is tracked in the “Money Spent to Date” column.
project_match_clean <- project_match_2 %>%
group_by(Sub.recipient, Award.number, Project.description, Spending.category) %>%
reframe(dollars_awarded = max(Sub.award.amount), dollars_spent = sum(Money.spent.to.date, na.rm = TRUE), state_abbr_prime, in_out) %>%
distinct()
perc_check <- function(data, var){
data %>%
left_join(state_awardees) %>%
mutate(check_perc = {{ var }} / state_total_allocation)
}
We see that we are able to match a highly heterogenous set of contracts back to the state that awarded them. This result calls into question our earlier assumption of non-randomness in the amount of contracts that are able to be matched. Nonetheless, we press on.
project_match_2 %>%
group_by(state_abbr_prime) %>%
reframe(dollars_spent = sum(Money.spent.to.date, na.rm = TRUE)) %>%
rename(state_abbr = state_abbr_prime) %>%
perc_check(dollars_spent) %>% view()
## Joining with `by = join_by(state_abbr)`
project_match_clean %>%
group_by(state_abbr_prime) %>%
reframe(state_dollars = sum(dollars_awarded, na.rm = TRUE), dollars_spent = sum(dollars_spent, na.rm = TRUE)) %>%
rename(state_abbr = state_abbr_prime) %>%
perc_check(dollars_spent) %>%
view()
## Joining with `by = join_by(state_abbr)`
We can then append to this dataset our contracts with no award numbers that we were still able to match.
project_match_final <- project_match_clean %>%
bind_rows(no_award_final)
We now have a dataset of 172507 individual contracts that we can analyze.
We start with a high level summary of state spending to compare to our previous estimates and data.
We know that we were not able to match all of our prime to sub awardee data with our data on individual contracts. As such, we begin with a description of how much we are off by for each state.
state_allocation_check <- project_match_final %>%
group_by(state_abbr_prime) %>%
reframe(spent_dollars = sum(dollars_spent, na.rm = TRUE)) %>%
rename(state_abbr = state_abbr_prime) %>%
left_join(state_awardees) %>%
mutate(perc_contract_spent = spent_dollars / state_total_allocation) %>%
ggplot() +
geom_point(shape = 21, aes(x = state_total_allocation, y = perc_contract_spent, fill = perc_contract_spent), size = 5, color = "black", alpha = 0.8) +
geom_text_repel(aes(x = state_total_allocation, y = perc_contract_spent, label = state_abbr)) +
scale_fill_distiller(palette = "YlOrRd") +
guides(fill = "none") +
scale_x_continuous(labels = scales::unit_format(unit = "$B", scale = 1e-9)) +
scale_y_continuous(labels = scales::percent) +
labs(x = "Total Allocation to State", y = "Dollars Traceable to Individual Projects") +
theme_bw()+
axis_theme
## Joining with `by = join_by(state_abbr)`
state_allocation_check
We can now see how well we can map the amount that states spent across a variety of discrete spending categories.
state_spending_contracts <- project_match_final %>%
group_by(state_abbr_prime, Spending.category) %>%
reframe(spent_dollars = sum(dollars_spent, na.rm = TRUE)) %>%
rename(state_abbr = state_abbr_prime) %>%
mutate(Spending.category = case_when(
Spending.category == "" ~ "Unknown",
TRUE ~ Spending.category
)) %>%
left_join(state_awardees) %>%
group_by(state_abbr) %>%
mutate(all_cats_spend = sum(spent_dollars)) %>%
ungroup() %>%
mutate(perc_contract_spent = spent_dollars / state_total_allocation,
perc_state_spent = all_cats_spend / state_total_allocation) %>%
ggplot() +
geom_col(aes(x = perc_contract_spent, y = reorder(state_abbr, state_total_allocation), fill = Spending.category), color = "black") +
scale_fill_manual(values = cat_scale) +
scale_x_continuous(labels = scales::percent) +
labs(x = "Percent of Contracts Spent on a Specific Category", y = "") +
theme_bw()
## Joining with `by = join_by(state_abbr)`
state_spending_contracts
We now focus specifically on how states acquired PPE.
From earlier, we have our marker of if a project description or spending category might be related to PPE expenditures. We use this indicator to filer our final matched dataset.
ppe_words <- c("ppe", "protective", "mask", "respirator", "nN95")
project_match_final <- project_match_final %>%
mutate(in_out_1 = case_when(
is.na(state_abbr_sub) ~ in_out,
state_abbr_sub == state_abbr_prime ~ "in_state",
state_abbr_sub != state_abbr_prime ~ "out_state"
))
ppe_projects <- project_match_final %>%
mutate(proj_lower = tolower(Project.description),
ppe_match = case_when(
str_detect(proj_lower, paste(ppe_words, collapse = "|")) | Spending.category == "Personal Protective Equipment" ~ 1
)) %>%
filter(ppe_match == 1) %>%
mutate(Spending.category = case_when(
Spending.category == "" ~ "Unknown",
TRUE ~ Spending.category
))
We first examine how PPE projects are distributed across discrete spending categories.
ppe_projects %>%
reframe(dollars_spent = sum(dollars_spent))
## # A tibble: 1 × 1
## dollars_spent
## <dbl>
## 1 5077949705.
States spent a total of 5,077,949,705 (or just over $5 Billion) on potentially PPE related expenditures.
ppe_projects_category <- ppe_projects %>%
group_by(Spending.category) %>%
reframe(dollars_spent = sum(dollars_spent)) %>%
ggplot() +
geom_col(aes(reorder(Spending.category, dollars_spent), y = dollars_spent, fill = Spending.category), color = "black") +
coord_flip() +
scale_y_continuous(labels = scales::unit_format(unit = "$B", scale = 1e-9)) +
scale_fill_manual(values = cat_scale) +
labs(y = "Potentially PPE Related Project Spending", x = "") +
guides(fill = "none") +
theme_bw()
ppe_projects_category
Unsurprisingly the “Personal Protective Equipment” spending category describes most of the possible PPE expenditures in our dataset.
As such, we can exlude this category to explore how PPE expenditures might be distributed across other spending categories.
ppe_projects %>%
group_by(Spending.category) %>%
reframe(dollars_spent = sum(dollars_spent)) %>%
filter(Spending.category != "Personal Protective Equipment") %>%
ggplot() +
geom_col(aes(reorder(Spending.category, dollars_spent), y = dollars_spent, fill = Spending.category), color = "black") +
coord_flip() +
scale_y_continuous(labels = scales::unit_format(unit = "$B", scale = 1e-9)) +
scale_fill_manual(values = rev(cat_scale)) +
labs(y = "Potentially PPE Related Project Spending", x = "") +
guides(fill = "none") +
theme_bw()
## By State
We can now explore state expenditures on PPE.
state_ppe_spend <- ppe_projects %>%
group_by(Spending.category, state_abbr_prime) %>%
reframe(dollars_spent = sum(dollars_spent)) %>%
rename(state_abbr = state_abbr_prime) %>%
left_join(state_awardees) %>%
ggplot() +
geom_col(aes(x = dollars_spent/state_total_allocation, y = reorder(state_abbr, state_total_allocation), fill = Spending.category), color = "black") +
scale_fill_manual(values = cat_scale) +
scale_x_continuous(labels = scales::percent)+
labs(x = "Percent of State Allocation on Possible PPE Expenditures", y = "") +
theme_bw() +
axis_theme
## Joining with `by = join_by(state_abbr)`
state_ppe_spend
We can also explore how much states spent on within state expenditures, compared to out of state expenditures.
ppe_projects %>%
group_by(in_out, state_abbr_prime) %>%
reframe(dollars_spent = sum(dollars_spent)) %>%
rename(state_abbr = state_abbr_prime) %>%
left_join(state_awardees) %>%
ggplot() +
geom_col(aes(x = dollars_spent/state_total_allocation, y = reorder(state_abbr, state_total_allocation), fill = in_out), color = "black") +
scale_x_continuous(labels = scales::percent)+
labs(x = "Percent of State Allocation on Possible PPE Expenditures", y = "") +
theme_bw() +
axis_theme
## Joining with `by = join_by(state_abbr)`
We now test a slightly different definition of spending contracts of interest, focusing instead on support for manufacturers, producers, and the like.
manf_words <- c("manufactur", "retool", "production", "procurement")
project_manf <- project_match_final %>%
mutate(proj_lower = tolower(Project.description),
manf_match = case_when(
str_detect(proj_lower, paste(manf_words, collapse = "|")) ~ 1
),
ppe_match = case_when(
str_detect(proj_lower, paste(ppe_words, collapse = "|")) | Spending.category == "Personal Protective Equipment" ~ 1
)) %>%
filter(manf_match == 1) %>%
mutate(Spending.category = case_when(
Spending.category == "" ~ "Unknown",
TRUE ~ Spending.category
))
project_manf %>%
filter(is.na(ppe_match)) %>%
group_by(Project.description) %>%
reframe(dollars_spent = sum(dollars_spent)) %>%
distinct()
## # A tibble: 18 × 2
## Project.description dollars_spent
## <chr> <dbl>
## 1 "Costs incurred to continue delivering critical government ope… 8702205.
## 2 "Part of EOHED's Partnership for Recovery Plan - Virtual Manuf… 1000000
## 3 "Procurement of Okta Single-Sign On and Multifactor authentica… 18580
## 4 "Purchase Order for Disinfectant Wipes under Emergency Procure… 16029.
## 5 "Reimbursement for costs including staff time for the SEOC and… 106305
## 6 "Reimbursement for procurement of licenses" 757628.
## 7 "Senior Meals\nProvision of meals to older adults (60+) to mee… 564128.
## 8 "The Agriculture Safety Grant Program is available to Michigan… 9361216.
## 9 "The Division of Tourism (Travel Nevada) is the states destina… 2697036.
## 10 "The Governor authorized a direct emergency grant of $2 millio… 3187931
## 11 "The production of a mobile phone application to offer anonymo… 16460
## 12 "The program was established to assist poultry farmers who hav… 211074.
## 13 "This funding supports collaboration between Utah State Univer… 4000000
## 14 "This project supports the procurement of UV Air Filtration Sy… 4468546.
## 15 "This project supports the procurement of influenza vaccines p… 2964640.
## 16 "To fund necessary equipment and technology for Nevada System … 2424798.
## 17 "To support the critical and growing need for food procurement… 8125
## 18 "Video production/marketing campaign to encourage the local re… 304531.
project_manf %>%
reframe(dollars_spent = sum(dollars_spent))
## # A tibble: 1 × 1
## dollars_spent
## <dbl>
## 1 393328066.
States spent $393,328,066 (or just under $400 million) on contracts related to manufacturing/production. Of this, 40,809,231 are not accounted for by the PPE indicator.
manf_proj_check <- project_manf %>%
group_by(Spending.category) %>%
reframe(dollars_spent = sum(dollars_spent)) %>%
ggplot() +
geom_col(aes(reorder(Spending.category, dollars_spent), y = dollars_spent, fill = Spending.category), color = "black") +
coord_flip() +
scale_fill_manual(values = cat_scale) +
scale_y_continuous(labels = scales::unit_format(unit = "$M", scale = 1e-6)) +
labs(y = "Manufacturing, Production, and Procurement Related Spending", x = "") +
guides(fill = "none") +
theme_bw() +
axis_theme
manf_proj_check
The reason we take an expansive approach to defining PPE expenditures is to ensure that we catch any possible matches to our database of confirmed domestic manufacturers.
Based on an analysis of prime-sub awardees that were not matched to any specific contract, we identify 3 contracts between O&M Halyard and NY that were not matched to any possible PPE related expenditures.
Bring in mask data to check against.
box_dir <- "/Users/Nosheal/Library/CloudStorage/Box-Box/COVID 19 Master Folder/Data/Masks/"
box_here <- function(file) {
paste(box_dir, file, sep = "")
}
mask_sample <- readRDS(box_here("final_sample.RDS"))
corp_remove <- c("Limited" ,"Ltd", "LTD", "Incorporated", "Company", "Corporation", "Co", "Inc", "[[:punct:]]", "LLC", "INC", "company", "COMPANY", "CORPORATION")
mask_comps <- mask_sample %>%
mutate(reg_appr = case_when(
niosh_counter == TRUE | fda_counter == TRUE ~ 1
)) %>%
select(dom_useful_num, original_company_name, address, DUNS, corporate_family_dbh, sales_dbh, state_abbr, reg_appr, min_date, desc) %>%
mutate(comp_match = tolower(str_trim(str_remove_all(original_company_name, paste(corp_remove, collapse = "|")))))
simplify <- function(string){
tolower(str_trim(str_remove_all(string, paste(corp_remove, collapse = "|"))))
}
We have to manually correct some entries from our PPE data.
project_manf_match <- ppe_projects%>%
mutate(comp_match = simplify(Sub.recipient)) %>%
mutate(comp_match = simplify(Sub.recipient),
Sub.recipient = str_remove_all(Sub.recipient, "[[:punct:]]")) %>%
mutate(comp_match = case_when(
str_detect("DENTEC", Sub.recipient) ~ "dentec safety specialists",
str_detect("HONEYWELL", Sub.recipient) ~ "honeywell safety products",
str_detect("KIMBERLY CLARK", Sub.recipient) ~ "kimberlyclark",
str_detect("GERSON", Sub.recipient) ~ "louis m gerson",
str_detect("MOLDEX", Sub.recipient) ~ "moldex",
str_detect("HALYARD", Sub.recipient) ~ "om halyard",
str_detect("THOMAS SCIENTIFIC", Sub.recipient) ~ "thomas scientific",
str_detect("MASTER BRANDS", Sub.recipient) ~ "master brands health",
str_detect("FAIRFIELD PROCESSING", Sub.recipient) ~ "fairfield processing",
str_detect("MERROW", Sub.recipient) ~ "merrow ppe",
str_detect("FISHER S|FISHER H", Sub.recipient) ~ "thermo fisher scientific",
str_detect("SKINNY", Sub.recipient) ~ "skinny",
str_detect("MARK ONE", Sub.recipient) ~ "mark one medical",
TRUE ~ comp_match)) %>%
left_join(mask_comps)
## Joining with `by = join_by(comp_match)`
conf_projs <- project_manf_match %>%
mutate(dom_useful_num = case_when(
is.na(dom_useful_num) ~ -1,
TRUE ~ dom_useful_num
)) %>%
group_by(dom_useful_num, state_abbr_prime) %>%
reframe(dollars_spent = sum(dollars_spent, na.rm = TRUE), companies = n()) %>%
rename(state_abbr = state_abbr_prime) %>%
left_join(state_awardees) %>%
ggplot() +
geom_col(aes(x = companies, y = reorder(state_abbr, state_total_allocation), fill = as.factor(dom_useful_num)), color = "black") +
scale_fill_manual(values = c("darkgrey","lightgrey", brewer.pal(8, "Dark2")[1] )) +
labs(x = "Number of projects to known manufacturers / distributors", y = "") +
theme_bw() +
axis_theme
## Joining with `by = join_by(state_abbr)`
conf_projs
project_manf_match %>%
filter(!is.na(dom_useful_num)) %>%
group_by(dom_useful_num, state_abbr_prime) %>%
reframe(dollars_spent = sum(dollars_spent, na.rm = TRUE), companies = n()) %>%
rename(state_abbr = state_abbr_prime) %>%
left_join(state_awardees) %>%
ggplot() +
geom_col(aes(x = companies, y = reorder(state_abbr, state_total_allocation), fill = as.factor(dom_useful_num)), color = "black") +
scale_fill_manual(values = c("lightgrey", brewer.pal(8, "Dark2")[1] )) +
labs(x = "Number of projects to known manufacturers / distributors", y = "") +
theme_bw() +
axis_theme
## Joining with `by = join_by(state_abbr)`
geom_label(aes(x = companies, y = reorder(state_abbr, state_total_allocation), fill = as.factor(dom_useful_num), label = companies), position = "stack", size = 4)
## mapping: x = ~companies, y = ~reorder(state_abbr, state_total_allocation), fill = ~as.factor(dom_useful_num), label = ~companies
## geom_label: parse = FALSE, label.padding = 0.25, label.r = 0.15, label.size = 0.25, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_stack
project_manf_match %>%
mutate(dom_useful_num = case_when(
is.na(dom_useful_num) ~ -1,
TRUE ~ dom_useful_num
)) %>%
group_by(dom_useful_num, state_abbr_prime) %>%
reframe(dollars_spent = sum(dollars_spent, na.rm = TRUE), companies = n()) %>%
rename(state_abbr = state_abbr_prime) %>%
left_join(state_awardees) %>%
ggplot() +
geom_col(aes(x = dollars_spent, y = reorder(state_abbr, state_total_allocation), fill = as.factor(dom_useful_num)), color = "black") +
scale_fill_manual(values = c("darkgrey","lightgrey", brewer.pal(8, "Dark2")[1] )) +
labs(x = "Number of projects to known manufacturers / distributors", y = "") +
theme_bw() +
axis_theme
## Joining with `by = join_by(state_abbr)`
conf_dollars <- project_manf_match %>%
filter(!is.na(dom_useful_num)) %>%
group_by(dom_useful_num, state_abbr_prime) %>%
reframe(dollars_spent = sum(dollars_spent, na.rm = TRUE), companies = n()) %>%
rename(state_abbr = state_abbr_prime) %>%
left_join(state_awardees) %>%
ggplot() +
geom_col(aes(x = dollars_spent, y = reorder(state_abbr, state_total_allocation), fill = as.factor(dom_useful_num)), color = "black") +
scale_fill_manual(values = c("lightgrey", brewer.pal(8, "Dark2")[1])) +
labs(x = "Dollars Spent on known manufacturers / distributors", y = "") +
scale_x_continuous(labels = scales::unit_format(unit = "$M", scale = 1e-6)) +
guides(fill = "none") +
theme_bw() +
axis_theme
## Joining with `by = join_by(state_abbr)`
conf_dollars
We can also explore how states spent on companies of different sizes.
projects_match_size <- project_manf_match %>%
mutate(firm_size = case_when(
sales_dbh < 1000000 ~ "Under $1 Mil",
sales_dbh >= 1000000 & sales_dbh < 10000000 ~ "$1M-$10M",
sales_dbh >= 10000000 & sales_dbh < 50000000 ~ "$10M-$50M",
sales_dbh >= 50000000 & sales_dbh < 1000000000 ~ "$50M-$1B",
sales_dbh >= 1000000000 ~ "$1B + "
),
size_pos = case_when(
sales_dbh < 1000000 ~ 5,
sales_dbh >= 1000000 & sales_dbh < 10000000 ~ 4,
sales_dbh >= 10000000 & sales_dbh < 50000000 ~ 3,
sales_dbh >= 50000000 & sales_dbh < 1000000000 ~ 2,
sales_dbh >= 1000000000 ~ 1
),
dom_manf = case_when(
dom_useful_num == 1 ~ "Known Domestic Manufacturer",
dom_useful_num == 0 ~ "Distributor, Wholesaler, or \nForeign Manufacturer"
))
size_col <- c(brewer.pal(9, "Blues")[4], brewer.pal(9, "Blues")[6], brewer.pal(9, "Blues")[8])
manf_labels <- c("Distributor, Foreign Manufacturer, or Wholesaler", "Domestic Manufacturer")
manf_size <- projects_match_size %>%
filter(!is.na(dom_useful_num)) %>%
group_by(dom_manf, state_abbr_prime, firm_size, size_pos) %>%
reframe(dollars_spent = sum(dollars_spent, na.rm = TRUE), companies = n()) %>%
rename(state_abbr = state_abbr_prime) %>%
left_join(state_awardees) %>%
ggplot() +
geom_col(aes(x = dollars_spent, y = reorder(state_abbr, state_total_allocation), fill = reorder(firm_size, size_pos)), color = "black") +
scale_fill_manual(values = rev(size_col)) +
labs(x = "Dollars Spent on known manufacturers / distributors", y = "", fill = "Annual Firm Sales (DB Hoovers)") +
scale_x_continuous(labels = scales::unit_format(unit = "$M", scale = 1e-6)) +
theme_bw() +
facet_wrap(~dom_manf) +
axis_theme
## Joining with `by = join_by(state_abbr)`
manf_size
projects_match_size %>%
filter(!is.na(dom_useful_num)) %>%
group_by(dom_manf, state_abbr_prime, Spending.category) %>%
reframe(dollars_spent = sum(dollars_spent, na.rm = TRUE), companies = n()) %>%
rename(state_abbr = state_abbr_prime) %>%
left_join(state_awardees) %>%
ggplot() +
geom_col(aes(x = dollars_spent, y = reorder(state_abbr, state_total_allocation), fill = Spending.category), color = "black") +
scale_fill_manual(values = cat_scale) +
labs(x = "Dollars Spent on known manufacturers / distributors", y = "", fill = "Annual Firm Sales (DB Hoovers)") +
scale_x_continuous(labels = scales::unit_format(unit = "$M", scale = 1e-6)) +
theme_bw() +
facet_wrap(~dom_manf) +
axis_theme
## Joining with `by = join_by(state_abbr)`
individual_companies <- projects_match_size %>%
filter(!is.na(dom_useful_num)) %>%
group_by(original_company_name, state_abbr_prime) %>%
reframe(dollars_spent = sum(dollars_spent, na.rm = TRUE), awards = n(), dom_useful = dom_useful_num) %>%
rename(state_abbr = state_abbr_prime) %>%
distinct() %>%
mutate(dom_manf = case_when(
dom_useful == 1 ~ "Known Domestic Manufacturer",
dom_useful == 0 ~ "Distributor, Wholesaler, or \nForeign Manufacturer"
)) %>%
ggplot() +
geom_point(aes(x = dollars_spent, y = original_company_name, fill = state_abbr), alpha = 0.7, shape = 21, color = "black", size = 4) +
geom_text_repel(aes(x = dollars_spent, y = original_company_name, label = state_abbr)) +
facet_wrap(~dom_manf) +
scale_x_continuous(labels = scales::unit_format(unit = "$M", scale = 1e-6)) +
guides(fill = "none") +
labs(x = "State Dollars Spent on Potentially PPE related Contracts", y = "Company Name") +
theme_bw() +
axis_theme
individual_companies
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps